install.packages("ggplot2")
install.packages("gganimate")
install.packages("dplyr")
install.packages("tidyr")
install.packages("stringr")
install.packages("geosphere")
install.packages("leaflet")
install.packages("tigris")
install.packages("sp")
install.packages("ggmap")
install.packages("maptools")
install.packages("broom")
install.packages("httr")
install.packages("rgdal")
install.packages("rmarkdown")
library(dplyr)
library(gganimate)
library(tidyr)
library(stringr)
library(geosphere)
library(leaflet)
library(rmarkdown)
# Structure of the data
citi <- read.csv("citicleaned.csv")

# Additional cleaning
citi$start.station.name <- as.factor(citi$start.station.name)
citi$end.station.name <- as.factor(citi$end.station.name)

# Ideas:
  # Stations with the highest number of riders departing
  # Stations with the highest number of riders arriving
  # Following bikes through their routes gif
  # Daily traffic volume (number of rides) by neighborhood?
  # Volume of traffic by neighborhood 
    # Probably have to add neighborhood columns, correlating with the lat/long of each station
  # Average bike deficit and surplus across different neighborhoods
  # % Utilization based on % of highest utilized station (based on total = number of arrivals + number of departures?)
    # We know the total number of bikes (based on bike id). So can we show the % utilization based on the stations with the highest % of bikes starting and /or stopping at that station daily/yearly?
      # Make daily gif of station utilization?
  # Most popular routes start.station to end.station - show top 5-10 traffic patterns?
library(dplyr)
library(leaflet)

# Stations with the highest number of departures
departures <- citi %>%
  group_by(station = `start.station.name`, latitude = `start.station.latitude`, longitude = `start.station.longitude`) %>%
  summarise(departure_count = n())

top_departures_sort <- head(departures[order(departures$departure_count, decreasing = TRUE),], n = 10)

departure_map <- leaflet(top_departures_sort) %>%
  addTiles() %>%
  setView(lat = 40.749773343979214, lng = -73.98552506559191, zoom = 12) %>%
  addCircleMarkers(lat = top_departures_sort$latitude, lng = top_departures_sort$longitude,
                   popup = paste(top_departures_sort$station, "<br>", top_departures_sort$departure_count, "<br>"),
                   radius = (top_departures_sort$departure_count / 100), color = "navy")
departure_map
# Stations with the highest number of arrivals
arrivals <- citi %>%
  group_by(station = `end.station.name`, latitude = `end.station.latitude`, longitude = `end.station.longitude`) %>%
  summarise(arrival_count = n())

top_arrivals_sort <- head(arrivals[order(arrivals$arrival_count, decreasing = TRUE),], n = 10)

arrival_map <- leaflet(top_arrivals_sort) %>%
  addTiles() %>%
  setView(lat = 40.749773343979214, lng = -73.98552506559191, zoom = 12) %>%
  addCircleMarkers(lat = top_arrivals_sort$latitude, lng = top_arrivals_sort$longitude,
                   popup = paste(top_arrivals_sort$station, "<br>", top_arrivals_sort$arrival_count, "<br>"),
                   radius = (top_arrivals_sort$arrival_count / 100), color = "navy")
arrival_map          
library(tigris)
library(dplyr)
library(leaflet)
library(sp)
library(ggmap)
library(maptools)
library(broom)
library(httr)
library(rgdal)

# Loading NYC Geospatial Data from Open-Source Library
nyc_boroughs_data <- GET('http://data.beta.nyc//dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson')

#Reading the spatial data
neighborhoods <- readOGR(content(nyc_boroughs_data, 'text'), 'OGRGeoJSON', verbose = F)


nyc <- data.frame(lat= citi$start.station.latitude, lng= citi$start.station.longitude, start.station.name = citi$start.station.name)

# Testing NYC Neighborhoods Map
nyc_neighborhoods_map <- leaflet(neighborhoods) %>%
  addTiles() %>%
  addPolygons(popup = ~neighborhood, color = "navy", weight = 2) %>%
  addProviderTiles("CartoDB.Positron")
nyc_neighborhoods_map
# Number of Stations by Neighborhood
nyc <- nyc %>%
  group_by(station = `start.station.name`, lat = `lat`, lng = `lng`) %>%
  summarise(departure_count = n())

nyc_spdf <- nyc
coordinates(nyc_spdf) <- ~lng + lat
proj4string(nyc_spdf) <- proj4string(neighborhoods)
matches <- over(nyc_spdf, neighborhoods)
nyc <- cbind(nyc, matches)

# Cleaning up Data
nyc$neighborhood <- as.factor(nyc$neighborhood)
nyc$borough <- as.factor(nyc$borough)
nyc$boroughCode <- as.factor(nyc$boroughCode)

# Stations by Neighborhood, Sized by Number of Departures
dep_size_map <- leaflet(neighborhoods) %>%
  addTiles() %>% 
  addPolygons(popup = ~neighborhood, color = "navy", weight = 2) %>% 
  addCircleMarkers(~lng, ~lat,popup = paste(nyc$station, "<br>", nyc$neighborhood, "<br>", nyc$borough, "<br>", nyc$departure_count, "Departures"), data = nyc,
                   color = "yellow", radius = nyc$departure_count / 100) %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-73.98, 40.75, zoom = 13)
dep_size_map
# Grouping Number of Stations by Neighborhood
stations_by_neighborhood <- nyc %>%
  group_by(neighborhood) %>%
  summarize(num_stations = n())

# Merging Spatial Polygon Map with Grouped Dataframe
map_data <- geo_join(neighborhoods, stations_by_neighborhood, by_sp = "neighborhood", by_df = "neighborhood")

map_data <- subset(map_data, num_stations != "NA")

pal <- colorNumeric(palette = "Greens",
                    domain = range(map_data@data$num_stations, na.rm = TRUE))

# Neighborhoods Color-Coded by Number of CitiBike Stations
num_stations_by_neighborhood <- leaflet(map_data) %>%
  addTiles() %>%
  addPolygons(fillColor = ~pal(num_stations), popup = paste(map_data$neighborhood, "<br>", map_data$borough,
                                                            "<br>",map_data$num_stations,"stations"),
              color = "black", weight = 1) %>%
  addLegend(pal = pal, values = ~num_stations, title = "Number of Stations") %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(-73.98, 40.75, zoom = 12)
# num_stations_by_neighborhood <- addLegend(map = num_stations_by_neighborhood, colors = )
num_stations_by_neighborhood
# Creating New Dataframe for Utilization 
citi_util <- merge(departures, arrivals)

# Utilization Rate
citi_util$util_rate <- citi_util$departure_count / (citi_util$departure_count + citi_util$arrival_count)

# Stations Sized by Utilization Rate
util_map <- leaflet(citi_util) %>%
  addTiles() %>%
  addCircles(lat = citi_util$latitude, lng = citi_util$longitude, 
                   popup = paste(citi_util$station, "<br>", (round(citi_util$util_rate * 100, 1)), "%"),
                   radius = (citi_util$util_rate*10)^2 , color = "navy", fillOpacity = 1)
util_map
# Setting up Google Maps for GGMap
library(ggmap)
register_google(key = "AIzaSyA5mOf170yvTi2Z4G711dLQ82TiEMWexnY", account_type = "standard")
base_nyc <- get_map(location = c(lon = -73.9857, lat =40.7484 ), zoom = 12, size = c(640,640) )

# Cleaning Data
library(lubridate)
citi$starttime <- as.POSIXct(strptime(citi$starttime, "%Y-%m-%d %H:%M:%S"))
citi$stoptime <- as.POSIXct(strptime(citi$stoptime, "%Y-%m-%d %H:%M:%S"))
citi$starthour <- hour(citi$starttime)
citi$day <- as.Date(citi$starttime)
citi$month <- as.factor(month(citi$starttime))
citi$numWeekday <- wday(citi$starttime)
citi$dayid <- as.factor(ifelse(citi$numWeekday < 6, "Weekday", "Weekend"))
citi$weekNum <- as.numeric(strftime(citi$starttime, format = "%V"))


citi_ggmap_2 <- citi %>%
  group_by(day = day, starttime = starttime, lng = start.station.longitude, lat = start.station.latitude,
           station = start.station.name) %>%
  summarize(dep = n()) %>%
  filter(day == "2019-06-01")

citi_ggmap_2
## # A tibble: 777 x 6
## # Groups:   day, starttime, lng, lat [777]
##    day        starttime             lng   lat station                  dep
##    <date>     <dttm>              <dbl> <dbl> <fct>                  <int>
##  1 2019-06-01 2019-05-31 20:01:05 -74.0  40.7 Clinton St & Union St      1
##  2 2019-06-01 2019-05-31 20:02:30 -74.0  40.7 1 Ave & E 16 St            1
##  3 2019-06-01 2019-05-31 20:05:57 -74.0  40.7 E 24 St & Park Ave S       1
##  4 2019-06-01 2019-05-31 20:08:32 -74.0  40.7 S 3 St & Bedford Ave       1
##  5 2019-06-01 2019-05-31 20:09:35 -74.0  40.7 Barclay St & Church St     1
##  6 2019-06-01 2019-05-31 20:10:29 -73.9  40.8 40 Ave & 9 St              1
##  7 2019-06-01 2019-05-31 20:11:25 -74.0  40.7 W 26 St & 8 Ave            1
##  8 2019-06-01 2019-05-31 20:12:53 -74.0  40.7 W 15 St & 10 Ave           1
##  9 2019-06-01 2019-05-31 20:13:11 -74.0  40.7 E 11 St & 1 Ave            1
## 10 2019-06-01 2019-05-31 20:16:15 -74.0  40.8 12 Ave & W 40 St           1
## # ... with 767 more rows
library(ggplot2)
library(gganimate)

# Animated Departure Map over One Day
dep_map <- ggmap(base_nyc) +
  geom_point(citi_ggmap_2, mapping = aes(x = citi_ggmap_2$lng, y = citi_ggmap_2$lat), size = 5, color = "red") +
  transition_states(citi_ggmap_2$starttime) +
  shadow_mark(color = "black")
dep_map

citi_ggmap_3 <- citi %>%
  group_by(day = day, stoptime = stoptime, lng = end.station.longitude, lat = end.station.latitude,
           station = end.station.name) %>%
  summarize(arr = n()) %>%
  filter(day == "2019-06-01")
citi_ggmap_3
## # A tibble: 777 x 6
## # Groups:   day, stoptime, lng, lat [777]
##    day        stoptime              lng   lat station                        arr
##    <date>     <dttm>              <dbl> <dbl> <fct>                        <int>
##  1 2019-06-01 2019-05-31 20:09:01 -74.0  40.7 Douglass St & 3 Ave              1
##  2 2019-06-01 2019-05-31 20:13:49 -74.0  40.8 W 27 St & 10 Ave                 1
##  3 2019-06-01 2019-05-31 20:16:09 -74.0  40.8 E 58 St & 3 Ave                  1
##  4 2019-06-01 2019-05-31 20:18:05 -74.0  40.7 W 4 St & 7 Ave S                 1
##  5 2019-06-01 2019-05-31 20:21:15 -73.9  40.7 Graham Ave & Grand St            1
##  6 2019-06-01 2019-05-31 20:21:48 -74.0  40.7 W 4 St & 7 Ave S                 1
##  7 2019-06-01 2019-05-31 20:22:27 -74.0  40.7 Plaza St West & Flatbush Ave     1
##  8 2019-06-01 2019-05-31 20:24:59 -74.0  40.7 E 15 St & 3 Ave                  1
##  9 2019-06-01 2019-05-31 20:26:03 -74.0  40.7 E 31 St & 3 Ave                  1
## 10 2019-06-01 2019-05-31 20:26:40 -74.0  40.8 W 74 St & Columbus Ave           1
## # ... with 767 more rows
# Animated Arrival Map over One Day
library(ggcats)
arr_map <- ggmap(base_nyc) +
  geom_cat(citi_ggmap_3, mapping = aes(x = citi_ggmap_3$lng, y = citi_ggmap_3$lat), size = 4, cat = "toast") +
  transition_states(citi_ggmap_3$stoptime) +
  shadow_mark(past = TRUE)
arr_map

citi_ggmap_4 <- citi %>%
  group_by(day = day, station = start.station.name, lng = start.station.longitude, lat = start.station.latitude) %>%
  summarize(dep = n())
citi_ggmap_4
## # A tibble: 117,411 x 5
## # Groups:   day, station, lng [117,382]
##    day        station                   lng   lat   dep
##    <date>     <fct>                   <dbl> <dbl> <int>
##  1 2019-01-01 1 Ave & E 16 St         -74.0  40.7     1
##  2 2019-01-01 1 Ave & E 18 St         -74.0  40.7     1
##  3 2019-01-01 1 Ave & E 18 St         -74.0  40.7     1
##  4 2019-01-01 1 Ave & E 30 St         -74.0  40.7     1
##  5 2019-01-01 1 Ave & E 78 St         -74.0  40.8     1
##  6 2019-01-01 11 Ave & W 27 St        -74.0  40.8     1
##  7 2019-01-01 28 Ave & 35 St          -73.9  40.8     1
##  8 2019-01-01 28 St & 36 Ave          -73.9  40.8     1
##  9 2019-01-01 3 Ave & E 112 St        -73.9  40.8     1
## 10 2019-01-01 3 Ave & Schermerhorn St -74.0  40.7     1
## # ... with 117,401 more rows
# Animate Heat Map of Departures Over ~2 Months
heatmap <- ggmap(base_nyc) +
  geom_bin_2d(data = citi_ggmap_4, mapping = aes(x = citi_ggmap_4$lng, y = citi_ggmap_4$lat), bins = 40) +
  transition_states(citi_ggmap_4$day) +
  labs(title = "Date: {closest_state}")
heatmap